clear all
set more off


********************************************************************************
//// 04-PROCESS_OFFICER: CONSTRUCT CROSSWALK FROM CITATIONS TO OFFICER ID'S ////
********************************************************************************


*************************************************************
//// PART 2A: CONSTRUCT DATASET OF FHP EMPLOYMENT SPELLS ////
*************************************************************

/// GET AGENCY NUMBER FOR FLORIDA HIGHWAY PATROL ///
*insheet using "$RAWDATA/FDLE/agencies.csv", clear
import excel using "${raw}/FDLE/agencies.xlsx", clear first
summ agcy_nbr if regexm(agcy_name,"Highway Safety")==1
global FHP = r(mean)
/// NOTE: FHP NUMBER = 9 ///

/// READ IN EMPLOYMENT RECORDS ///
*insheet using "$RAWDATA/FDLE/employment.csv", clear
import excel using "${raw}/FDLE/employment.xlsx", clear first

/// KEEP ONLY FHP SPELLS //
keep if agcy_nbr==$FHP

/// KEEP ONLY LAW ENFORCEMENT SPELLS ///
keep if employ_class=="LE"

/// DROP IF SPELL ENDS PRIOR TO 2005 ///
*gen sepYear=substr(separation_date,-4,.)
*destring sepYear, replace force
gen sepYear = year(separation_date)
drop if (sepYear<2005)&!mi(sepYear)

/// IMPUTE A FAKE SEPARATION DATE OF JAN 1, 2016 IF MISSING (i.e. SPELL IN PROGRESS) ///
gen tempdate = date("1/1/2019","MDY")
format %td tempdate
replace separation_date = tempdate if mi(separation_date)


/// CONVERT START + SEPARATION DATES TO STATA DATE FORMAT ///
*gen start=date(employ_start_date,"MDY")
*format start %td
*gen end=date(separation_date,"MDY")
*format end %td

gen start = employ_start_date
format %td start
gen end = separation_date
format end %td

/// STORE SPELLS AS TEMPFILE ///
ren employ_type type
keep person_nbr type start end
tempfile SPELLS
save `SPELLS'


//// GET OFFICER NAMES FROM PERSON DATA ////
*insheet using "$RAWDATA/FDLE/person_data.csv", clear
import excel using "${raw}/FDLE/person_data.xlsx", clear first

/// CLEAN UP NAMES //
gen last=trim(lower(last_name))
gen first=trim(lower(first_name))
gen middle=trim(lower(middle_name))
replace middle="" if middle=="†††††††††††††††"

/// KEEP ONLY PERSON NUMBER AND NAMES ///
keep person_nbr last first middle


/// DUPLICATES? ///
duplicates drop 
duplicates drop person_nbr, force


//// MERGE NAMES WITH SPELLS ///
merge 1:m person_nbr using `SPELLS', keep(3) nogen

/// DUPLICATES? ///
gsort person_nbr start -end
duplicates drop person_nbr start, force

//// STORE ////
sort person_nbr start
qui compress
saveold "${temp}/FHP-spells", replace




******************************************************************
//// PART 2B: STORE CASES WHERE OFFICER LAST NAME IDENTIFIES ////
//// THE TICKETING OFFICER PERFECTLY                         ////
*****************************************************************

/// IDENTIFY CASES WHERE THERE IS ONLY A SINGLE CANDIDATE OFFICER WITH A GIVEN LAST NAME ///
use "${temp}/FHP-spells", clear
duplicates drop person_nbr, force
bysort last: egen nPerson=sum(1)
keep if nPerson==1
keep person_nbr last
tempfile LIST
save `LIST'

/// IDENTIFY PERFECT MATCHES IN CITATIONS DATA TO THESE LAST NAMES ///
use "${temp}/process-citation", clear
// KEEP ONLY FHP //
keep if IssueAgencyType==1
keep CitationNumber CountyNum OffenseDate OfficerFirst OfficerMiddle OfficerLast

* MERGE ON OFFICER LAST NAME *
gen last=trim(lower(OfficerLast))
merge m:1 last using `LIST'

/// STORE MATCHES ///
preserve
keep if _m==3
keep CitationNumber CountyNum OffenseDate person_nbr
qui compress
saveold "${temp}/OFFICER-uniquematched", replace
restore 

/// NOW STORE THOSE NOT MATCHING ///
keep if _m==1
drop person_nbr _merge
qui compress
saveold "${temp}/OFFICER-uniqueunmatched", replace



******************************************************************************************
//// PART 2C: USE JOINBY COMMAND TO GENERATE ALL POSSIBLE MATCHES                    /////
//// BETWEEN REMAINING UNMATCHED STOPS AND CANDIDATE OFFICERS WITH NON-UNIQUE NAMES  /////
******************************************************************************************

/// CASES IN FHP SPELLS WHERE 2+ OFFICERS HAVE SAME LAST NAME ///
use "${temp}/FHP-spells", clear
duplicates drop person_nbr, force
bysort last: egen nPerson = sum(1)
keep if nPerson>1
keep person_nbr
merge 1:m person_nbr using "${temp}/FHP-spells", nogen keep(3)
tempfile LIST
save `LIST'

/// JOINBY ///
use "${temp}/OFFICER-uniqueunmatched", clear
joinby last using `LIST'
qui compress



*******************************************************************
//// PART 2D: IDENTIFY VALID MATCHES FROM CANDIDATES IN JOINBY ////
*******************************************************************

/// HOW THIS STEP WORKS: 
/// FOR EACH CRITERIA (e.g. FULL NAME), CODE PRESENCE OF MATCH BETWEEN STOPS AND SPELLS ///
/// IF THERE IS A MATCH FOR A GIVEN STOP, DROP UNMATCHED FOR THAT STOP ///
/// COUNT NUMBER OF MATCHES REMAINING FOR GIVEN STOP ///
/// IF THAT COUNT IS 1, CALL IT A MATCH AND STORE ////
/// IF THAT COUNT IS >1 OR THERE IS NO MATCH, MOVE ON TO NEXT STEP USING REMAINING UNMATCHED ///


/// CREATE NUMERIC STOPID //
egen StopId = group(CitationNumber CountyNum OffenseDate)

/// STEP I: ELIMINATE IF CITATION DATE IS OUTSIDE MATCHED SPELL ///
gen InRange = (OffenseDate >= start) & (OffenseDate <= end)
keep if InRange==1
drop InRange
bysort StopId: egen nObs = sum(1)

// STORE AS MATCH IF ONLY ONE CANDIDATE REMAINING //
preserve
keep if nObs==1
keep CitationNumber CountyNum OffenseDate person_nbr
qui compress
saveold "${temp}/OFFICER-uniqueunmatched-1", replace
restore
// RETAIN STILL UNMATCHED FOR FUTURE STEPS //
drop if nObs==1
drop nObs


/// STEP II: FULL NAME MATCHES ///
* FULL OFFICER NAME IN CITATIONS *
gen full_cite = trim(lower(OfficerF))+" "+trim(lower(OfficerM))+" "+trim(lower(OfficerL))
* FUlL OFFICER NAME IN PERSONEL DATA *
gen full_person = first+" "+middle+" "+last
* FLAG FOR MATCH ACROSS DATASETS *
gen full_match = (full_cite==full_person) 

bysort StopId: egen any_full_match = max(full_match)
drop if any_full_match==1 & full_match==0
drop any_full_match

// STORE AS MATCH IF ONLY CANDIDATE REMAINING //
bysort StopId: egen nObs = sum(1)
preserve
keep if nObs==1
keep CitationNumber CountyNum OffenseDate person_nbr
qui compress
saveold "${temp}/OFFICER-uniqueunmatched-2", replace
restore
// RETAIN STILL UNMATCHED FOR FUTURE STEPS //
drop if nObs==1
drop nObs



/// STEP III: FIRST NAME MATCH ////
gen cite_first = trim(lower(OfficerFirst))
gen first_match = (cite_first==first) & !mi(cite_first)
bysort StopId: egen any_first_match = max(first_match)
drop if any_first_match==1 & first_match==0
drop any_first_match

// STORE AS MATCH IF ONLY CANDIDATE REMAINING //
bysort StopId: egen nObs=sum(1)
preserve
keep if nObs==1
keep CitationNumber CountyNum OffenseDate person_nbr
qui compress
saveold "${temp}/OFFICER-uniqueunmatched-3", replace
/// RETAIN STILL UNMATCHED FOR FUTURE STEPS //
restore
drop if nObs==1
drop nObs


/// STEP IV: FIRST INITIAL MATCH MATCH ////
gen cite_fi=substr(cite_first,1,1)
gen person_fi=substr(first,1,1)
gen fi_match=(cite_fi==person_fi)&!mi(cite_fi)
bysort StopId: egen any_fi_match=max(fi_match)
drop if any_fi_match==1&fi_match==0
drop any_fi_match

// STORE AS MATCH IF ONLY CANDIDATE REMAINING //
bysort StopId: egen nObs=sum(1)
preserve
keep if nObs==1
keep CitationNumber CountyNum OffenseDate person_nbr
qui compress
saveold "${temp}/OFFICER-uniqueunmatched-4", replace
/// RETAIN STILL UNMATCHED FOR FUTURE STEPS //
restore
drop if nObs==1
drop nObs


/// STEP V: COMPILE MATCHES IN PREVIOUS STEPS INTO 1 FILE ///

use "${temp}/OFFICER-uniqueunmatched-1", clear
forval i=2/4 {
qui append using "${temp}/OFFICER-uniqueunmatched-`i'"
}
qui compress
saveold "${temp}/OFFICER-uniqueunmatched-matched", replace

/// DELETE STEPS ///
forval i=1/4 {
rm "${temp}/OFFICER-uniqueunmatched-`i'.dta"
}




*******************************************************
//// PART 2E: COMPILE ALL MATCHES FROM PRIOR STEPS ////
*******************************************************

use "${temp}/OFFICER-uniquematched", clear
qui append using "${temp}/OFFICER-uniqueunmatched-matched"
drop OffenseDate
qui compress
saveold "${temp}/step-1-officer", replace


/// DELETE TEMPORARY FILES ///
rm "${temp}/FHP-spells.dta"
rm "${temp}/OFFICER-uniquematched.dta"
rm "${temp}/OFFICER-uniqueunmatched.dta"
rm "${temp}/OFFICER-uniqueunmatched-matched.dta"

























